library(readstata13)
library(dplyr)

## set working directory to replication repository
survey <- read.dta13("Harris_Data/Harris 1968 National Malaise Survey, study no. 1813/harris_s1813_spss.dta")

## pid 
survey$pid <- c(1:nrow(survey))

## study value
survey$study <- as.character(1813)

## year- value 
survey$year <- 1968

## urban- creating NA's
survey$urban <- NA

## region- NA
survey$region <- NA

#respondent name- hh
summary(survey$F7B)
survey$hh <- as.character(survey$F7B)
summary(survey$hh)
  
#inequality Q6_1
summary(survey$Q6_1)
survey$inequality <- as.character(survey$Q6_1)
survey$inequality.variable <- 1

#union- f3_1; f3_2; f3_3
summary(survey$F3_1) #self
summary(survey$F3_2) #other family member
summary(survey$F3_3) #no member in family

survey$union.self <- as.character(survey$F3_1)

survey$union.other <- as.character(survey$F3_2)

#employed -- Q11a
summary(survey$Q11A)
survey$employed <- as.character(survey$Q11A)
str(survey$employed)

#employed.self
survey$employed.self <- NA
str(survey$employed.self)

#occupation
summary(survey$Q11B)
survey$occupation <- as.character(survey$Q11B)
str(survey$occupation)

## occupation.self
survey$occupation.self <- NA

##hh size
summary(survey$F7A)
survey$hhsize <- as.character(survey$F7A)
class(survey$hhsize)


#education
summary(survey$F11B)
survey$educ <- as.character(survey$F11B)
str(survey$educ)

## income 
summary(survey$F14)
survey$income <- as.character(survey$F14)
str(survey$income)

##age 
summary(survey$F13_1)
summary(survey$F13_2)
sum(is.na(survey$F13_1) & is.na(survey$F13_2))

survey$age <- ifelse(is.na(survey$F13_1), 
                     as.character(survey$F13_2), 
                     as.character(survey$F13_1))
survey$age <- as.character(survey$age)

str(survey$age)

## race
summary(survey$F16)
survey$race <- as.character(survey$F16)
table(survey$race)

## politics 
survey$party <- NA

## ideology
survey$ideology <- NA


#gender 
summary(survey$F13_1)
summary(survey$F13_2)
sum(!is.na(survey$F13_2) & !is.na(survey$F13_1))

survey$gender <- NA
 for(i in 1:nrow(survey)){
  if(!is.na(survey$F13_1[i])){
     survey$gender[i] <- "Male" } 
  else if(!is.na(survey$F13_2[i])){
      survey$gender[i] <- "Female"
       }
   }
summary(survey$gender)
table(survey$gender)
survey$gender <- as.character(survey$gender)

##religion 
summary(survey$F12)
survey$religion <- as.character(survey$F12)
class(survey$religion)

## factuals 

survey$factual1 <- NA
survey$factual2 <- NA
survey$factual3 <- NA

## alienation index
survey$dontcare <- as.character(survey$Q6_2)
survey$dontcount <- as.character(survey$Q6_4)
survey$leftout <- as.character(survey$Q6_10)

## placement of question
survey$question_place <- "no_party"


### put together data set

survey_1813 <- survey[,c("pid", "study", "year", "urban", "region", "hh",
                         "inequality", "inequality.variable", "union.self", "union.other",
                         "employed", "employed.self", "occupation", "occupation.self", "hhsize", "educ", "income", 
                         "age", "race", "party", "ideology", "gender", "religion",
                         "factual1", "factual2", "factual3", "dontcare", "dontcount", "leftout",
                         "question_place")]


## save dataset in folder (i.e. working directory must be set to folder)
#saveRDS(survey_1813, file = "Harris_Data/survey_1813.rds")
